Show the code
library(readr)
library(tidyverse)
library(broom)
library(corrplot)
library(MASS)
library(nlme)
library(ggeffects)
library(RColorBrewer)
load(url("http://www.rossmanchance.com/iscam3/ISCAM.RData"))
select <- dplyr::selectlibrary(readr)
library(tidyverse)
library(broom)
library(corrplot)
library(MASS)
library(nlme)
library(ggeffects)
library(RColorBrewer)
load(url("http://www.rossmanchance.com/iscam3/ISCAM.RData"))
select <- dplyr::selectallYrsFinal <- read_csv("FinalData/All Years Final Public - with Vars.csv")
allYrsFinal_NA <- allYrsFinal |>
filter(cognitive.competence.pre != 0,
cognitive.competence.post != 0)|>
mutate(gender = case_when(is.na(gender) ~ "NA/Other",
TRUE~gender))
allYrsFinal <- allYrsFinal |>
filter(cognitive.competence.pre != 0,
cognitive.competence.post != 0)allYrsFinal |>
group_by(gender)|>
count()# A tibble: 3 × 2
# Groups: gender [3]
gender n
<chr> <int>
1 Female 7478
2 Male 4189
3 <NA> 63
For pre questions, for women
competenceFull <- c("q6e.pre.a", "q6e.post.a", "q7a.pre.a", "q7a.post.a", "q8f.pre.a", "q8f.post.a", "q9a.pre.a", "q9a.post.a", "q9b.pre.a", "q9b.post.a", "q9e.pre.a", "q9e.post.a")
pre_only_F <- allYrsFinal|>
filter(gender == "Female")|>
select(all_of(competenceFull))|>
select(ends_with("pre.a")) |>
drop_na()
cor(pre_only_F) q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
q6e.pre.a 1.0000000 0.4800976 0.4308480 0.3894616 0.4401435 0.6347125
q7a.pre.a 0.4800976 1.0000000 0.3946282 0.4222106 0.4515881 0.5119590
q8f.pre.a 0.4308480 0.3946282 1.0000000 0.2979830 0.3694416 0.5146106
q9a.pre.a 0.3894616 0.4222106 0.2979830 1.0000000 0.6909989 0.4012884
q9b.pre.a 0.4401435 0.4515881 0.3694416 0.6909989 1.0000000 0.4718131
q9e.pre.a 0.6347125 0.5119590 0.5146106 0.4012884 0.4718131 1.0000000
corrplot(cor(pre_only_F))For pre questions, for men
pre_only_M <- allYrsFinal|>
filter(gender == "Male")|>
select(all_of(competenceFull))|>
select(ends_with("pre.a")) |>
drop_na()
cor(pre_only_M) q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
q6e.pre.a 1.0000000 0.4725968 0.4301648 0.3839258 0.4374356 0.6181036
q7a.pre.a 0.4725968 1.0000000 0.3572141 0.4003075 0.4225857 0.4806798
q8f.pre.a 0.4301648 0.3572141 1.0000000 0.2886259 0.3566590 0.5043949
q9a.pre.a 0.3839258 0.4003075 0.2886259 1.0000000 0.6812080 0.4039653
q9b.pre.a 0.4374356 0.4225857 0.3566590 0.6812080 1.0000000 0.4772119
q9e.pre.a 0.6181036 0.4806798 0.5043949 0.4039653 0.4772119 1.0000000
corrplot(cor(pre_only_M))For pre questions, for NAs
pre_only_NA <- allYrsFinal|>
filter(is.na(gender))|>
select(all_of(competenceFull))|>
select(ends_with("pre.a")) |>
drop_na()
cor(pre_only_NA) q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
q6e.pre.a 1.0000000 0.5852185 0.3654192 0.3483319 0.5205958 0.6564209
q7a.pre.a 0.5852185 1.0000000 0.2212318 0.4279232 0.4932185 0.6025653
q8f.pre.a 0.3654192 0.2212318 1.0000000 0.2081000 0.3183099 0.4659341
q9a.pre.a 0.3483319 0.4279232 0.2081000 1.0000000 0.6169784 0.5134717
q9b.pre.a 0.5205958 0.4932185 0.3183099 0.6169784 1.0000000 0.6528418
q9e.pre.a 0.6564209 0.6025653 0.4659341 0.5134717 0.6528418 1.0000000
corrplot(cor(pre_only_NA))For post questions, for women
post_only_F <- allYrsFinal|>
filter(gender == "Female")|>
select(all_of(competenceFull))|>
select(ends_with("post.a")) |>
drop_na()
cor(post_only_F) q6e.post.a q7a.post.a q8f.post.a q9a.post.a q9b.post.a q9e.post.a
q6e.post.a 1.0000000 0.5630740 0.4195056 0.4363502 0.5021618 0.6828753
q7a.post.a 0.5630740 1.0000000 0.3963168 0.5889909 0.6303725 0.6265500
q8f.post.a 0.4195056 0.3963168 1.0000000 0.3038777 0.3883250 0.4635532
q9a.post.a 0.4363502 0.5889909 0.3038777 1.0000000 0.7556062 0.4806988
q9b.post.a 0.5021618 0.6303725 0.3883250 0.7556062 1.0000000 0.5709947
q9e.post.a 0.6828753 0.6265500 0.4635532 0.4806988 0.5709947 1.0000000
corrplot(cor(post_only_F))For post questions, for men
post_only_M <- allYrsFinal|>
filter(gender == "Male")|>
select(all_of(competenceFull))|>
select(ends_with("post.a")) |>
drop_na()
cor(post_only_M) q6e.post.a q7a.post.a q8f.post.a q9a.post.a q9b.post.a q9e.post.a
q6e.post.a 1.0000000 0.5662949 0.4294258 0.4446943 0.4943005 0.6623038
q7a.post.a 0.5662949 1.0000000 0.3815573 0.5245169 0.5728703 0.6034080
q8f.post.a 0.4294258 0.3815573 1.0000000 0.2676293 0.3421776 0.4589553
q9a.post.a 0.4446943 0.5245169 0.2676293 1.0000000 0.7258889 0.4482372
q9b.post.a 0.4943005 0.5728703 0.3421776 0.7258889 1.0000000 0.5410897
q9e.post.a 0.6623038 0.6034080 0.4589553 0.4482372 0.5410897 1.0000000
corrplot(cor(post_only_M))For post questions, for NAs
post_only_M <- allYrsFinal|>
filter(gender == "Male")|>
select(all_of(competenceFull))|>
select(ends_with("post.a")) |>
drop_na()
cor(post_only_M) q6e.post.a q7a.post.a q8f.post.a q9a.post.a q9b.post.a q9e.post.a
q6e.post.a 1.0000000 0.5662949 0.4294258 0.4446943 0.4943005 0.6623038
q7a.post.a 0.5662949 1.0000000 0.3815573 0.5245169 0.5728703 0.6034080
q8f.post.a 0.4294258 0.3815573 1.0000000 0.2676293 0.3421776 0.4589553
q9a.post.a 0.4446943 0.5245169 0.2676293 1.0000000 0.7258889 0.4482372
q9b.post.a 0.4943005 0.5728703 0.3421776 0.7258889 1.0000000 0.5410897
q9e.post.a 0.6623038 0.6034080 0.4589553 0.4482372 0.5410897 1.0000000
corrplot(cor(post_only_M))calc_req_NA <- allYrsFinal|>
filter(math.prereq == "Calculus",
cognitive.competence.pre != 0,
cognitive.competence.post != 0)|>
mutate(gender = case_when(is.na(gender) ~ "NA/Other",
TRUE~gender), mastering.confidence.change = mastering.confidence.post - mastering.confidence.pre)
calc_req <- allYrsFinal|>
filter(math.prereq == "Calculus",
!is.na(gender),
cognitive.competence.pre != 0,
cognitive.competence.post != 0)|>
mutate(mastering.confidence.change = mastering.confidence.post - mastering.confidence.pre)
calc.fit <- lme(cognitive.competence.change ~ gender+ instructor.gender + years.teaching.intro.stats.binned,
random = ~ 1+gender|instructor, data= calc_req)
plot(resid(calc.fit) ~ fitted(calc.fit), xlab = "Fitted", ylab="Residuals")
abline(h=0, lty = "dashed")#normal Q-Q plot of residuals
qqnorm(resid(calc.fit), ylab="Residuals"); qqline(resid(calc.fit), lty = "dashed")#histogram of residuals
hist(resid(calc.fit), main = '', col="olivedrab", xlab = "Residuals")summary(calc.fit) Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
1075.435 1111.755 -528.7176
Random effects:
Formula: ~1 + gender | instructor
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.3773246 (Intr)
genderMale 0.1897776 -0.2
Residual 0.8244677
Fixed effects: cognitive.competence.change ~ gender + instructor.gender + years.teaching.intro.stats.binned
Value Std.Error DF t-value
(Intercept) 0.18021904 0.2722960 413 0.6618498
genderMale -0.06048295 0.1399364 413 -0.4322175
instructor.genderMale -0.31608540 0.3156329 5 -1.0014337
years.teaching.intro.stats.binned10-20 -0.15129835 0.3107182 5 -0.4869312
years.teaching.intro.stats.binned5-10 0.03485842 0.1039343 413 0.3353888
p-value
(Intercept) 0.5084
genderMale 0.6658
instructor.genderMale 0.3626
years.teaching.intro.stats.binned10-20 0.6469
years.teaching.intro.stats.binned5-10 0.7375
Correlation:
(Intr) gndrMl inst.M y....1
genderMale -0.154
instructor.genderMale -0.686 -0.109
years.teaching.intro.stats.binned10-20 -0.352 0.000 -0.091
years.teaching.intro.stats.binned5-10 -0.048 0.009 -0.129 0.132
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.268132315 -0.597894311 0.008557559 0.628149508 2.998537004
Number of Observations: 423
Number of Groups: 8
ranef(calc.fit) # coeffs for each instructor section (Intercept) genderMale
Instructor177 -0.0562811602 0.08499587
Instructor182 0.0375606385 0.05362522
Instructor204 0.0007681933 -0.09347672
Instructor234 0.4673877482 0.06024862
Instructor40 0.2439284568 -0.03238776
Instructor73 -0.2027556280 0.02039623
Instructor95 -0.0378557422 -0.15187570
Instructor99 -0.4527525063 0.05847425
With instructor gender: * * residual StdDev = 0.8244677
* genderMale StdDev = 0.1897776
Without instructor gender:
With genderinstructor gender: residual StdDev = 0.8226201
* genderMale StdDev = 0.2226835
With/without models seem to explain a similar amount of variability gender*instructor gender explains a bit more variability
calc.fit2 <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned,
random = ~ 1+gender|instructor, data= calc_req_NA)
plot(resid(calc.fit2) ~ fitted(calc.fit2), xlab = "Fitted", ylab="Residuals")
abline(h=0, lty = "dashed")#normal Q-Q plot of residuals
qqnorm(resid(calc.fit2), ylab="Residuals"); qqline(resid(calc.fit), lty = "dashed")#histogram of residuals
hist(resid(calc.fit2), main = '', col="olivedrab", xlab = "Residuals")summary(calc.fit2)Linear mixed-effects model fit by REML
Data: calc_req_NA
AIC BIC logLik
1081.911 1130.365 -528.9553
Random effects:
Formula: ~1 + gender | instructor
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.4071961 (Intr) gndrMl
genderMale 0.1846553 -0.399
genderNA/Other 6.2170483 -0.001 0.000
Residual 0.8242597
Fixed effects: cognitive.competence.change ~ gender + years.teaching.intro.stats.binned
Value Std.Error DF t-value
(Intercept) -0.00573151 0.207789 413 -0.0275833
genderMale -0.08778980 0.136839 413 -0.6415544
genderNA/Other -0.02810501 6.271800 413 -0.0044812
years.teaching.intro.stats.binned10-20 -0.16252515 0.314972 6 -0.5159983
years.teaching.intro.stats.binned5-10 0.02051101 0.103159 413 0.1988285
p-value
(Intercept) 0.9780
genderMale 0.5215
genderNA/Other 0.9964
years.teaching.intro.stats.binned10-20 0.6243
years.teaching.intro.stats.binned5-10 0.8425
Correlation:
(Intr) gndrMl gnNA/O y....1
genderMale -0.389
genderNA/Other 0.000 0.002
years.teaching.intro.stats.binned10-20 -0.563 0.009 0.000
years.teaching.intro.stats.binned5-10 -0.179 -0.015 -0.005 0.122
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.265842870 -0.612337175 -0.002866103 0.628048731 3.002408483
Number of Observations: 424
Number of Groups: 8
Turning some vars into factors for ggpredict
calc_factor <- calc_req|>
mutate(gender = factor(gender),
instructor.gender= factor(instructor.gender),
years.teaching.intro.stats.binned = factor( years.teaching.intro.stats.binned),
instructor = factor(instructor))calc.fit.2 random slopes graph
The lines look pretty parallel = the effect of gender on cognitive competence change is consistent across instructors
calc.fit <- lme(cognitive.competence.change ~ gender+ instructor.gender + years.teaching.intro.stats.binned,
random = ~ 1+gender|instructor, data= calc_factor)
calc_plot <- ggpredict(calc.fit,
terms = c("gender", "years.teaching.intro.stats.binned", "instructor [sample=8]"),
type = "re")
ggplot(calc_plot, aes(x=x, y=predicted, group = group, color = group))+
geom_line() +
labs(y= "Cognitive Competence Change", x="Female = 0, Male = 1", color = "Years Teaching Intro Stats")No significant interactions?
calc.fit_interaction <- lme(cognitive.competence.change ~ gender * instructor.gender + years.teaching.intro.stats.binned,
random = ~ 1|instructor, data= calc_req)
summary(calc.fit_interaction)Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
1073.433 1105.698 -528.7166
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.4225361 0.8259412
Fixed effects: cognitive.competence.change ~ gender * instructor.gender + years.teaching.intro.stats.binned
Value Std.Error DF t-value
(Intercept) 0.3139906 0.3144654 412 0.9984900
genderMale -0.5078141 0.3563894 412 -1.4248856
instructor.genderMale -0.3952205 0.3677724 5 -1.0746333
years.teaching.intro.stats.binned10-20 -0.1393314 0.3447903 5 -0.4041046
years.teaching.intro.stats.binned5-10 0.0320615 0.1044988 412 0.3068122
genderMale:instructor.genderMale 0.3696232 0.3675957 412 1.0055158
p-value
(Intercept) 0.3186
genderMale 0.1549
instructor.genderMale 0.3316
years.teaching.intro.stats.binned10-20 0.7028
years.teaching.intro.stats.binned5-10 0.7591
genderMale:instructor.genderMale 0.3152
Correlation:
(Intr) gndrMl inst.M y....1 y....5
genderMale -0.336
instructor.genderMale -0.733 0.304
years.teaching.intro.stats.binned10-20 -0.324 -0.045 -0.100
years.teaching.intro.stats.binned5-10 -0.038 -0.005 -0.116 0.119
genderMale:instructor.genderMale 0.324 -0.970 -0.330 0.047 0.014
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.2640557460 -0.5758305777 -0.0009392462 0.6432488302 3.0259104671
Number of Observations: 423
Number of Groups: 8
0-5 10-20 5-10
145 58 220
[1] "Instructor40_Section1_Fall_16-17" "Instructor73_Section1_Spring_16-17"
[3] "Instructor95_Section1_Fall_16-17" "Instructor99_Section1_Spring_16-17"
[5] "Instructor95_Section1_Fall_15-16" "Instructor177_Section1_Fall_15-16"
[7] "Instructor177_Section2_Fall_15-16" "Instructor182_Section1_Fall_15-16"
[9] "Instructor204_Section1_Spring_14-15" "Instructor95_Section1_Fall_14-15"
[11] "Instructor234_Section1_Spring_14-15"
competencePre <- c("q6e.pre.a", "q7a.pre.a", "q8f.pre.a", "q9a.pre.a", "q9b.pre.a", "q9e.pre.a")
competencePost <- c("q6e.post.a", "q7a.post.a", "q8f.post.a", "q9a.post.a", "q9b.post.a", "q9e.post.a")
calc_req|> # checking for NAs and 0s
select(all_of(competencePre), cognitive.competence.pre)# A tibble: 423 × 7
q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 5 4 5 5 5
2 3 7 4 7 7 5
3 6 5 5 6 6 NA
4 6 6 5 6 6 6
5 5 6 4 6 6 5
6 5 5 5 6 6 5
7 4 3 5 6 5 5
8 5 6 4 6 6 4
9 6 5 3 6 6 2
10 5 6 4 5 5 5
# ℹ 413 more rows
# ℹ 1 more variable: cognitive.competence.pre <dbl>
table(is.na(calc_req$q7a.pre.a))
FALSE TRUE
422 1
table(calc_req$cognitive.competence.pre == 0) # get rid of (then check for post - make sure that they only get a 0 on cognitive.competence.pre/post if they didn't fill out any)
FALSE
423
calc_req|>
filter(is.na(q7a.pre.a), cognitive.competence.pre != 0)|>
select(all_of(competencePre), cognitive.competence.pre)# A tibble: 1 × 7
q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 NA NA 4 5 5 4
# ℹ 1 more variable: cognitive.competence.pre <dbl>
calc_req|>
filter(cognitive.competence.pre == 0) # none# A tibble: 0 × 298
# ℹ 298 variables: instructor <chr>, section <chr>, instructor.section <chr>,
# opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
# test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
# q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
# q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
# q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
# q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …
calc_req|>
filter(cognitive.competence.post == 0) # none# A tibble: 0 × 298
# ℹ 298 variables: instructor <chr>, section <chr>, instructor.section <chr>,
# opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
# test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
# q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
# q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
# q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
# q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …
table(is.na(calc_req$q7a.post.a))
FALSE TRUE
421 2
table(calc_req$cognitive.competence.post == 0) # get rid of (then check for post - make sure that they only get a 0 on cognitive.competence.pre/post if they didn't fill out any)
FALSE
423
# we can remove cognitive.competencepre/post from dataset
calc_req|>
filter(is.na(q7a.post.a), cognitive.competence.post != 0)|>
select(all_of(competencePost), cognitive.competence.post)# A tibble: 2 × 7
q6e.post.a q7a.post.a q8f.post.a q9a.post.a q9b.post.a q9e.post.a
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 NA 4 3 2 5
2 5 NA 5 6 6 6
# ℹ 1 more variable: cognitive.competence.post <dbl>
length(unique(calc_req$instructor.section))[1] 11
boxplot(resid(calc.fit) ~ calc_req$carnegie.classification, xlab = "instructor section", ylab="Residuals")length(resid(calc.fit))[1] 423
length(calc_req$instructor.section)[1] 423
nrow(calc_req)[1] 423
iscamsummary(resid(calc.fit))missing n Min Q1 Median Q3 Max Mean SD
0 423 -2.694 -0.493 0.007 0.518 2.472 0 0.815
iscamsummary(calc_req$cognitive.competence.change)missing n Min Q1 Median Q3 Max Mean SD
0 423 -3 -0.667 -0.167 0.333 2.333 -0.184 0.851
table(calc_req$gender)
Female Male
258 165
table(calc_req$instructor.gender)
Female Male
51 372
table(calc_req$years.teaching.experience.binned)
0-5 10-20 5-10
31 65 208
View(calc_req$years.teaching.experience.binned)calc_req|>
ggplot(aes(x=cognitive.competence.pre, y = cognitive.competence.post)) +
geom_point()calc_req|>
filter(cognitive.competence.pre == 0)# A tibble: 0 × 298
# ℹ 298 variables: instructor <chr>, section <chr>, instructor.section <chr>,
# opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
# test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
# q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
# q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
# q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
# q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …
calc_req|>
filter(is.na(years.teaching.experience.binned),
instructor == "Instructor204")|>
select(institution, textbook.used, textbook.classification, carnegie.classification, instructor.section, years.teaching.intro.stats)# A tibble: 10 × 6
institution textbook.used textbook.classification carnegie.classification
<dbl> <chr> <chr> <chr>
1 108 ISCAM ISI Baccalaureate College
2 108 ISCAM ISI Baccalaureate College
3 108 ISCAM ISI Baccalaureate College
4 108 ISCAM ISI Baccalaureate College
5 108 ISCAM ISI Baccalaureate College
6 108 ISCAM ISI Baccalaureate College
7 108 ISCAM ISI Baccalaureate College
8 108 ISCAM ISI Baccalaureate College
9 108 ISCAM ISI Baccalaureate College
10 108 ISCAM ISI Baccalaureate College
# ℹ 2 more variables: instructor.section <chr>,
# years.teaching.intro.stats <dbl>
Instructors that look worse for women and better for men:
Instructors that look worse for men and better for women * Instructor 40 (Female) * Instructor 204 (Female) * Instructor 95
Instructor177 had a baseline cognitive competence change of -0.44 204, 243, and 40 were significant (204 was borderline significant) genderMale:Instructor204 was borderline significant
# reference instructor: 177
calc_fit_reg <- lm(cognitive.competence.change ~ gender*instructor + years.teaching.intro.stats.binned, data= calc_req)
summary(calc_fit_reg)
Call:
lm(formula = cognitive.competence.change ~ gender * instructor +
years.teaching.intro.stats.binned, data = calc_req)
Residuals:
Min 1Q Median 3Q Max
-2.69950 -0.53191 0.02778 0.56060 2.46900
Coefficients: (2 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.447917 0.206071 -2.174 0.03031 *
genderMale 0.211075 0.279688 0.755 0.45088
instructorInstructor182 0.059028 0.394596 0.150 0.88116
instructorInstructor204 0.753472 0.394596 1.909 0.05690 .
instructorInstructor234 0.883083 0.435446 2.028 0.04321 *
instructorInstructor40 1.197917 0.460789 2.600 0.00967 **
instructorInstructor73 0.394153 0.253738 1.553 0.12111
instructorInstructor95 0.314080 0.227560 1.380 0.16828
instructorInstructor99 -0.618750 0.422320 -1.465 0.14366
years.teaching.intro.stats.binned10-20 NA NA NA NA
years.teaching.intro.stats.binned5-10 -0.001833 0.106108 -0.017 0.98622
genderMale:instructorInstructor182 0.082576 0.537150 0.154 0.87790
genderMale:instructorInstructor204 -1.099963 0.601105 -1.830 0.06800 .
genderMale:instructorInstructor234 -0.154212 0.504065 -0.306 0.75981
genderMale:instructorInstructor40 -0.655519 0.601105 -1.091 0.27613
genderMale:instructorInstructor73 NA NA NA NA
genderMale:instructorInstructor95 -0.469343 0.297841 -1.576 0.11584
genderMale:instructorInstructor99 0.286148 0.520322 0.550 0.58266
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.8243 on 407 degrees of freedom
Multiple R-squared: 0.09533, Adjusted R-squared: 0.06198
F-statistic: 2.859 on 15 and 407 DF, p-value: 0.0002762
color_instr <- rainbow(length(unique(calc_req$instructor)))
color_instr_gender <- rainbow(length(unique(calc_req$instructor.gender)))
interaction.plot(x.factor = calc_req$gender, trace.factor = calc_req$instructor, response = calc_req$cognitive.competence.change,
xlab="Gender",
ylab="Cognitive Competence Change",
trace.label="Instructor",
col = color_instr)interaction.plot(x.factor = calc_req$gender, trace.factor = calc_req$instructor.gender, response = calc_req$cognitive.competence.change,
xlab="Gender",
ylab="Cognitive Competence Change",
trace.label="Instructor Gender", col=color_instr_gender)calc_req|>
select(instructor, instructor.gender)|>
distinct()# A tibble: 8 × 2
instructor instructor.gender
<chr> <chr>
1 Instructor40 Female
2 Instructor73 Female
3 Instructor95 Male
4 Instructor99 Male
5 Instructor177 Male
6 Instructor182 Male
7 Instructor204 Female
8 Instructor234 Male
Significant instructors (significantly diff from 177)
calc_req|>
filter(instructor=="Instructor204" |
instructor == "Instructor234" |
instructor == "Instructor40")|>
group_by(instructor, instructor.gender)|>
summarize(mean_cog_comp_change = mean(cognitive.competence.change, na.rm = TRUE),
mean_ach_gain = mean(ach.gain.24, na.rm = TRUE), instructor.gender,
.groups='drop')|>
distinct()# A tibble: 3 × 4
instructor instructor.gender mean_cog_comp_change mean_ach_gain
<chr> <chr> <dbl> <dbl>
1 Instructor204 Female -0.0500 0.363
2 Instructor234 Male 0.477 0.403
3 Instructor40 Female 0.483 0.101
calc_req|>
ggplot(aes(x=mastering.confidence.pre, y = mastering.confidence.post)) +
geom_point()plot(mastering.confidence.pre ~ mastering.confidence.post, data= calc_req)calc_req|>
filter(is.na(mastering.confidence.change))# A tibble: 1 × 298
instructor section instructor.section opt.out.pre opt.out.post test.time.pre
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Instructor95 1 Instructor95_Sect… 1 1 45
# ℹ 292 more variables: test.time.post <dbl>, q16a.pre.c.1415 <chr>,
# q16b.pre.c.1415 <chr>, q16c.pre.c.1415 <chr>, q16a.pre.c <chr>,
# q16b.pre.c <chr>, q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>,
# q17.pre.c <chr>, q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>,
# q21.pre.c <chr>, q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>,
# q25.pre.c <chr>, q26.pre.c <chr>, q27.pre.c <chr>, q28.pre.c <chr>,
# q29.pre.c <chr>, q30.pre.c <chr>, q31.pre.c <chr>, q32.pre.c <chr>, …
calc_req|>
filter(mastering.confidence.pre ==0 | mastering.confidence.post == 0)# A tibble: 0 × 298
# ℹ 298 variables: instructor <chr>, section <chr>, instructor.section <chr>,
# opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
# test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
# q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
# q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
# q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
# q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …
lme notes: fixed effects: coeffocoemts that are constant across individuals or entities random effects: vary among individuals or entities
How cog comp change is influenced by gender, years teaching intro stats, and mastering confidence change. Random effects structure allows the intercept and slope for gender to vary by instructor.
Random effects
* Intercept for instructor: StdDev very close to zero -> very little variability in the intercept across different instructors
genderMale: StdDev = 0.166 -> some variability in the effect of being male across different instructors
Residual: StdDev = 0.708 -> within group variation (how to better interpret?)
Fixed Effects * Intercept: Intercept is not significantly different from 0
genderMale: gender does not have a signifcant effect on cog comp change
years.teaching.intro.stats does not have a significant effect on cog comp change
mastering.confidence.change: coeff = 0.3547 and significantly affects cog comp change
calc.fit3 <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + mastering.confidence.change,
random = ~ 1+gender|instructor, data= calc_req, na.action=na.omit)
plot(resid(calc.fit3) ~ fitted(calc.fit3), xlab = "Fitted", ylab="Residuals")
abline(h=0, lty = "dashed")#normal Q-Q plot of residuals
qqnorm(resid(calc.fit3), ylab="Residuals"); qqline(resid(calc.fit3), lty = "dashed")#histogram of residuals
hist(resid(calc.fit3), main = '', col="olivedrab", xlab = "Residuals")summary(calc.fit3)Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
942.6339 978.9317 -462.317
Random effects:
Formula: ~1 + gender | instructor
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.00006540548 (Intr)
genderMale 0.16648662701 0.071
Residual 0.70849385189
Fixed effects: cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + mastering.confidence.change
Value Std.Error DF t-value
(Intercept) 0.0277877 0.06611520 411 0.420292
genderMale -0.0134283 0.10965600 411 -0.122458
years.teaching.intro.stats.binned10-20 -0.1231203 0.12508382 6 -0.984302
years.teaching.intro.stats.binned5-10 0.0397746 0.07785070 411 0.510909
mastering.confidence.change 0.3547183 0.02708736 411 13.095342
p-value
(Intercept) 0.6745
genderMale 0.9026
years.teaching.intro.stats.binned10-20 0.3630
years.teaching.intro.stats.binned5-10 0.6097
mastering.confidence.change 0.0000
Correlation:
(Intr) gndrMl y....1 y....5
genderMale -0.246
years.teaching.intro.stats.binned10-20 -0.426 -0.209
years.teaching.intro.stats.binned5-10 -0.693 0.030 0.357
mastering.confidence.change 0.191 -0.009 0.004 0.005
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.0369771 -0.5960189 0.0232673 0.6103688 3.6986830
Number of Observations: 422
Number of Groups: 8
Interaction plot : why just one line?
calc.fit3_fct <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + mastering.confidence.change,
random = ~ 1+gender|instructor, data= calc_factor, na.action=na.omit)
plot_calc_3 <- ggpredict(calc.fit3_fct, terms = c("gender", "instructor [sample=8]"), type = "re")
ggplot(plot_calc_3, aes(x=x, y =predicted, group = group, color = group)) +
geom_line(alpha = 0.5) +
labs(y= "Cognitive Competence Change", x= "Female = 0, Male = 1", color = "instructor")genderMale*mastering.confidence.change not significant
calc.fit3_interaction <- lme(cognitive.competence.change ~ gender*mastering.confidence.change + years.teaching.intro.stats.binned,
random = ~ 1+gender|instructor, data= calc_req, na.action=na.omit)
# plot(resid(calc.fit3_interaction) ~ fitted(calc.fit3_interaction), xlab = "Fitted", ylab="Residuals")
# abline(h=0, lty = "dashed")
# #normal Q-Q plot of residuals
# qqnorm(resid(calc.fit3_interaction), ylab="Residuals"); qqline(resid(calc.fit3_interaction), lty = "dashed")
# #histogram of residuals
# hist(resid(calc.fit3_interaction), main = '', col="olivedrab", xlab = "Residuals")
summary(calc.fit3_interaction)Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
948.4651 988.772 -464.2326
Random effects:
Formula: ~1 + gender | instructor
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.00002614325 (Intr)
genderMale 0.16404158301 0.062
Residual 0.70931187429
Fixed effects: cognitive.competence.change ~ gender * mastering.confidence.change + years.teaching.intro.stats.binned
Value Std.Error DF t-value
(Intercept) 0.0246704 0.06680131 410 0.369310
genderMale -0.0057739 0.11188205 410 -0.051607
mastering.confidence.change 0.3476056 0.03468662 410 10.021316
years.teaching.intro.stats.binned10-20 -0.1224038 0.12497709 6 -0.979410
years.teaching.intro.stats.binned5-10 0.0391039 0.07793732 410 0.501736
genderMale:mastering.confidence.change 0.0184800 0.05564323 410 0.332115
p-value
(Intercept) 0.7121
genderMale 0.9589
mastering.confidence.change 0.0000
years.teaching.intro.stats.binned10-20 0.3652
years.teaching.intro.stats.binned5-10 0.6161
genderMale:mastering.confidence.change 0.7400
Correlation:
(Intr) gndrMl mstr.. y....1 y....5
genderMale -0.269
mastering.confidence.change 0.233 -0.147
years.teaching.intro.stats.binned10-20 -0.422 -0.204 0.005
years.teaching.intro.stats.binned5-10 -0.682 0.022 0.021 0.357
genderMale:mastering.confidence.change -0.137 0.225 -0.624 -0.002 -0.028
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.04913940 -0.57996809 0.01516545 0.61500080 3.68972580
Number of Observations: 422
Number of Groups: 8
Without gender as a random effect, gender is significant => at least 1 instructor has a different gender effect
calc.fit3_noRandGender <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + mastering.confidence.change,
random = ~ 1|instructor, data= calc_req, na.action=na.omit)
summary(calc.fit3_noRandGender)Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
940.604 968.8356 -463.302
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.1103604 0.7105646
Fixed effects: cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + mastering.confidence.change
Value Std.Error DF t-value
(Intercept) 0.0837677 0.08988791 411 0.931913
genderMale -0.1178220 0.07291363 411 -1.615912
years.teaching.intro.stats.binned10-20 -0.0946135 0.14424061 6 -0.655942
years.teaching.intro.stats.binned5-10 0.0489552 0.08252204 411 0.593238
mastering.confidence.change 0.3529208 0.02719360 411 12.978082
p-value
(Intercept) 0.3519
genderMale 0.1069
years.teaching.intro.stats.binned10-20 0.5362
years.teaching.intro.stats.binned5-10 0.5533
mastering.confidence.change 0.0000
Correlation:
(Intr) gndrMl y....1 y....5
genderMale -0.330
years.teaching.intro.stats.binned10-20 -0.528 -0.050
years.teaching.intro.stats.binned5-10 -0.408 -0.016 0.257
mastering.confidence.change 0.113 0.029 0.015 -0.016
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.98810623 -0.59003822 0.04044726 0.61318375 3.71753447
Number of Observations: 422
Number of Groups: 8
Interaction plot (why one line?)
calc.fit3_RandGender_int <- lme(cognitive.competence.change ~ years.teaching.intro.stats.binned + mastering.confidence.change*gender,
random = ~ 1+gender|instructor, data= calc_factor, na.action=na.omit)
plot_confidence_interaction3 <- ggpredict(calc.fit3_RandGender_int, terms = c("gender", "instructor [sample=8]"), type = "re")
ggplot(plot_confidence_interaction3, aes(x=x, y = predicted, group = group, color = group)) +
geom_line(alpha = 0.5) +
labs(y="Cognitive Competence Change", x= "Female = 0, Male = 1", color = "instructor")calc.fit3_noRandGender_int <- lme(cognitive.competence.change ~ years.teaching.intro.stats.binned + mastering.confidence.change*gender,
random = ~ 1|instructor, data= calc_req, na.action=na.omit)
summary(calc.fit3_noRandGender_int)Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
946.3881 978.6336 -465.1941
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.1051965 0.7114321
Fixed effects: cognitive.competence.change ~ years.teaching.intro.stats.binned + mastering.confidence.change * gender
Value Std.Error DF t-value
(Intercept) 0.0764787 0.08897967 410 0.859507
years.teaching.intro.stats.binned10-20 -0.0928692 0.14192040 6 -0.654375
years.teaching.intro.stats.binned5-10 0.0479935 0.08234823 410 0.582811
mastering.confidence.change 0.3441004 0.03490555 410 9.858040
genderMale -0.1045880 0.07857583 410 -1.331046
mastering.confidence.change:genderMale 0.0232753 0.05515420 410 0.422005
p-value
(Intercept) 0.3906
years.teaching.intro.stats.binned10-20 0.5371
years.teaching.intro.stats.binned5-10 0.5603
mastering.confidence.change 0.0000
genderMale 0.1839
mastering.confidence.change:genderMale 0.6732
Correlation:
(Intr) y....1 y....5 mstr.. gndrMl
years.teaching.intro.stats.binned10-20 -0.520
years.teaching.intro.stats.binned5-10 -0.411 0.264
mastering.confidence.change 0.164 0.015 0.008
genderMale -0.351 -0.051 -0.029 -0.213
mastering.confidence.change:genderMale -0.118 -0.006 -0.035 -0.627 0.373
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.00492939 -0.60936531 0.03300359 0.62403105 3.70628393
Number of Observations: 422
Number of Groups: 8
Loop through other attitudes * all look significant * no significant interactions * Affect and Interest have the “most” significant interactions
other_attitudes_change <- c("affect.change", "difficulty.change", "effort.change", "interest.change", "value.change")
summaries <- list()
for (change in other_attitudes_change){
formula <- as.formula(paste("cognitive.competence.change ~ gender * ", change))
calc.fit.loop <- lme(formula, random = ~ 1| instructor, data = calc_req, na.action = na.omit)
summaries[[change]] <- summary(calc.fit.loop)
}
for (change in other_attitudes_change) {
print(summaries[[change]])
}Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
824.1318 848.359 -406.0659
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.08192137 0.6198973
Fixed effects: list(formula)
Value Std.Error DF t-value p-value
(Intercept) -0.0330696 0.05844999 412 -0.565776 0.5719
genderMale -0.0871045 0.06459843 412 -1.348400 0.1783
affect.change 0.6442851 0.04132587 412 15.590357 0.0000
genderMale:affect.change -0.0999716 0.06499968 412 -1.538033 0.1248
Correlation:
(Intr) gndrMl affct.
genderMale -0.475
affect.change 0.063 -0.078
genderMale:affect.change -0.064 0.205 -0.629
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.9862456 -0.6266261 0.1209534 0.6497055 3.0009020
Number of Observations: 423
Number of Groups: 8
Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
1001.734 1025.961 -494.8671
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.2789162 0.7609141
Fixed effects: list(formula)
Value Std.Error DF t-value p-value
(Intercept) -0.1287321 0.12217457 412 -1.053674 0.2926
genderMale -0.1033518 0.07995907 412 -1.292559 0.1969
difficulty.change 0.5345103 0.07048952 412 7.582834 0.0000
genderMale:difficulty.change -0.1043797 0.11365716 412 -0.918373 0.3590
Correlation:
(Intr) gndrMl dffcl.
genderMale -0.318
difficulty.change -0.095 0.096
genderMale:difficulty.change 0.030 -0.055 -0.605
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.62432245 -0.60982704 0.01414437 0.66459365 2.93700352
Number of Observations: 423
Number of Groups: 8
Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
1061.193 1085.42 -524.5965
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.3205431 0.8149323
Fixed effects: list(formula)
Value Std.Error DF t-value p-value
(Intercept) 0.08087320 0.14408229 412 0.5612987 0.5749
genderMale -0.10833797 0.11722934 412 -0.9241541 0.3559
effort.change 0.13457046 0.05083584 412 2.6471574 0.0084
genderMale:effort.change 0.07015743 0.09188282 412 0.7635533 0.4456
Correlation:
(Intr) gndrMl effrt.
genderMale -0.354
effort.change 0.311 -0.390
genderMale:effort.change -0.212 0.685 -0.551
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.38751981 -0.62868174 0.02309865 0.68027605 2.99375762
Number of Observations: 423
Number of Groups: 8
Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
1025.402 1049.63 -506.7012
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.2897189 0.7814186
Fixed effects: list(formula)
Value Std.Error DF t-value p-value
(Intercept) 0.12449583 0.12937907 412 0.962256 0.3365
genderMale -0.09332598 0.09270482 412 -1.006700 0.3147
interest.change 0.25009316 0.04980093 412 5.021857 0.0000
genderMale:interest.change 0.12936340 0.08683203 412 1.489812 0.1370
Correlation:
(Intr) gndrMl intrs.
genderMale -0.329
interest.change 0.233 -0.262
genderMale:interest.change -0.139 0.471 -0.566
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.62657610 -0.64211979 0.02605822 0.66592010 2.79926989
Number of Observations: 423
Number of Groups: 8
Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
972.6517 996.879 -480.3259
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.3001982 0.7337543
Fixed effects: list(formula)
Value Std.Error DF t-value p-value
(Intercept) 0.0519067 0.12712010 412 0.408328 0.6832
genderMale -0.1404155 0.07870246 412 -1.784131 0.0751
value.change 0.5388321 0.05914037 412 9.111071 0.0000
genderMale:value.change -0.0336186 0.10686670 412 -0.314585 0.7532
Correlation:
(Intr) gndrMl vl.chn
genderMale -0.296
value.change 0.080 -0.087
genderMale:value.change -0.071 0.209 -0.550
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.77169565 -0.58074059 -0.01509698 0.67853073 2.33993508
Number of Observations: 423
Number of Groups: 8
Random effects:
Intercept: StdDev = 0.2181 -> some variability in cog comp change can be attributed to differences between instructors
Residuals: StdDev = 0.8274 -> variation in cog comp change within instructors not explained by fixed effects
Fixed effects:
Intercept: baseline level of cog comp change for reference group (females with instructors with less than 5 years of teaching) -> not significant
genderMale: coeff = -0.1882, p = 0.0305 -> being male has a significant negative effect on cog comp change
years teaching intro stats (10-20) does not have a significant effect on cog comp change compared with those with 0-5 years experience
years teaching intro stats (5-10) has a positive signifcant increase on cog comp change than those with 0-5 years experience
NotSBI, NotSBI2, and Other aren’t different from ISI but OtherSBI is ???
There are gender differences for genders using the same textbook
calc.fit5 <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification,
random = ~ 1|instructor, data= calc_req_NA, na.action=na.omit)
summary(calc.fit5)Linear mixed-effects model fit by REML
Data: calc_req_NA
AIC BIC logLik
1073.381 1117.692 -525.6903
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.2408299 0.8265278
Fixed effects: cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification
Value Std.Error DF t-value
(Intercept) -0.1604085 0.2587134 412 -0.6200238
genderMale -0.1882161 0.0866918 412 -2.1710936
genderNA/Other -0.0072920 0.8291315 412 -0.0087947
years.teaching.intro.stats.binned10-20 0.0267763 0.3376069 3 0.0793121
years.teaching.intro.stats.binned5-10 0.7831209 0.3950516 412 1.9823254
textbook.classificationNotSBI -0.1309499 0.3316226 3 -0.3948763
textbook.classificationNotSBI2 0.7566714 0.4387838 3 1.7244744
textbook.classificationOther 0.0042100 0.3982526 3 0.0105713
textbook.classificationOtherSBI -0.7820871 0.3897670 412 -2.0065501
p-value
(Intercept) 0.5356
genderMale 0.0305
genderNA/Other 0.9930
years.teaching.intro.stats.binned10-20 0.9418
years.teaching.intro.stats.binned5-10 0.0481
textbook.classificationNotSBI 0.7193
textbook.classificationNotSBI2 0.1831
textbook.classificationOther 0.9922
textbook.classificationOtherSBI 0.0455
Correlation:
(Intr) gndrMl gnNA/O y....1 y....5
genderMale -0.127
genderNA/Other -0.004 0.035
years.teaching.intro.stats.binned10-20 -0.750 -0.028 -0.001
years.teaching.intro.stats.binned5-10 -0.633 -0.086 -0.003 0.496
textbook.classificationNotSBI -0.770 0.016 0.001 0.588 0.501
textbook.classificationNotSBI2 -0.575 -0.044 -0.002 0.446 0.384
textbook.classificationOther 0.001 -0.011 0.000 -0.357 0.001
textbook.classificationOtherSBI 0.563 0.097 -0.007 -0.443 -0.963
tx.NSBI t.NSBI2 txtb.O
genderMale
genderNA/Other
years.teaching.intro.stats.binned10-20
years.teaching.intro.stats.binned5-10
textbook.classificationNotSBI
textbook.classificationNotSBI2 0.452
textbook.classificationOther 0.000 0.000
textbook.classificationOtherSBI -0.447 -0.343 -0.001
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.235170712 -0.612511978 -0.007571671 0.623440822 3.015879131
Number of Observations: 424
Number of Groups: 8
Random slopes (textbook classification)
calc.fit5_fct <- lme(cognitive.competence.change ~ gender + + textbook.classification+years.teaching.intro.stats.binned ,
random = ~ 1|instructor, data= calc_factor, na.action=na.omit)
calc5_plot <- ggpredict(calc.fit5_fct,
terms = c("gender", "textbook.classification", "instructor [sample=8]"),
type = "re")
ggplot(calc5_plot, aes(x=x, y=predicted, group = group, color = group))+
geom_line() +
labs(y= "Cognitive Competence Change", x="Female = 0, Male = 1", color = "textbook classification")Investigate “imbalance” of gender across textbooks Proportions of males for each textbook classification
unique(calc_req_NA$textbook.classification)[1] "NotSBI2" "NotSBI" "OtherSBI" "ISI" "Other"
calc_req_NA|>
group_by(textbook.classification)|>
summarize(
total = n(),
num_males = sum(gender == "Male")
) |>
mutate(prop_males = num_males/total)# A tibble: 5 × 4
textbook.classification total num_males prop_males
<chr> <int> <int> <dbl>
1 ISI 154 73 0.474
2 NotSBI 48 12 0.25
3 NotSBI2 10 6 0.6
4 Other 13 7 0.538
5 OtherSBI 199 67 0.337
With textbook.classification*gender, genderMale p = 0.1770
No significant interaction terms
There is some imbalance of gender across the textbooks, but not enough to be signifcant?
calc.fit5_interaction <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification * gender,
random = ~ 1|instructor, data= calc_req, na.action = na.omit)
summary(calc.fit5_interaction)Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
1077.927 1134.188 -524.9636
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.4228149 0.8269005
Fixed effects: cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification * gender
Value Std.Error DF t-value
(Intercept) -0.1605701 0.4352078 408 -0.3689504
genderMale -0.1877899 0.1388554 408 -1.3524129
years.teaching.intro.stats.binned10-20 0.0433712 0.5448395 3 0.0796036
years.teaching.intro.stats.binned5-10 0.7829532 0.6320353 408 1.2387807
textbook.classificationNotSBI -0.2871487 0.5586817 3 -0.5139754
textbook.classificationNotSBI2 0.9105701 0.7342475 3 1.2401405
textbook.classificationOther -0.2716900 0.6381124 3 -0.4257713
textbook.classificationOtherSBI -0.7674213 0.6366768 408 -1.2053544
genderMale:textbook.classificationNotSBI 0.4167518 0.4204712 408 0.9911542
genderMale:textbook.classificationNotSBI2 -0.2566546 0.5515276 408 -0.4653522
genderMale:textbook.classificationOther 0.4814407 0.4805437 408 1.0018666
genderMale:textbook.classificationOtherSBI -0.0427938 0.1862951 408 -0.2297100
p-value
(Intercept) 0.7124
genderMale 0.1770
years.teaching.intro.stats.binned10-20 0.9416
years.teaching.intro.stats.binned5-10 0.2161
textbook.classificationNotSBI 0.6427
textbook.classificationNotSBI2 0.3031
textbook.classificationOther 0.6990
textbook.classificationOtherSBI 0.2288
genderMale:textbook.classificationNotSBI 0.3222
genderMale:textbook.classificationNotSBI2 0.6419
genderMale:textbook.classificationOther 0.3170
genderMale:textbook.classificationOtherSBI 0.8184
Correlation:
(Intr) gndrMl y....1 y....5 tx.NSBI
genderMale -0.121
years.teaching.intro.stats.binned10-20 -0.784 -0.025
years.teaching.intro.stats.binned5-10 -0.668 -0.086 0.544
textbook.classificationNotSBI -0.779 0.094 0.611 0.520
textbook.classificationNotSBI2 -0.593 0.072 0.465 0.396 0.462
textbook.classificationOther -0.013 0.104 -0.319 -0.009 0.010
textbook.classificationOtherSBI 0.625 0.169 -0.520 -0.980 -0.487
genderMale:textbook.classificationNotSBI 0.040 -0.330 0.008 0.029 -0.258
genderMale:textbook.classificationNotSBI2 0.030 -0.252 0.006 0.022 -0.024
genderMale:textbook.classificationOther 0.035 -0.289 0.007 0.025 -0.027
genderMale:textbook.classificationOtherSBI 0.090 -0.745 0.019 0.064 -0.070
t.NSBI2 txtb.O t.OSBI gnM:.NSBI
genderMale
years.teaching.intro.stats.binned10-20
years.teaching.intro.stats.binned5-10
textbook.classificationNotSBI
textbook.classificationNotSBI2
textbook.classificationOther 0.007
textbook.classificationOtherSBI -0.370 0.018
genderMale:textbook.classificationNotSBI -0.024 -0.034 -0.056
genderMale:textbook.classificationNotSBI2 -0.440 -0.026 -0.042 0.083
genderMale:textbook.classificationOther -0.021 -0.402 -0.049 0.095
genderMale:textbook.classificationOtherSBI -0.053 -0.078 -0.170 0.246
gM:.NSBI2 gnM:.O
genderMale
years.teaching.intro.stats.binned10-20
years.teaching.intro.stats.binned5-10
textbook.classificationNotSBI
textbook.classificationNotSBI2
textbook.classificationOther
textbook.classificationOtherSBI
genderMale:textbook.classificationNotSBI
genderMale:textbook.classificationNotSBI2
genderMale:textbook.classificationOther 0.073
genderMale:textbook.classificationOtherSBI 0.188 0.215
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.25105021 -0.61204041 0.01817228 0.65580869 2.99718212
Number of Observations: 423
Number of Groups: 8
Turning some vars of allYrsFinal into factors
allYrsFinal_factor <- allYrsFinal|>
mutate(gender = factor(gender),
instructor.gender= factor(instructor.gender),
years.teaching.intro.stats.binned = factor( years.teaching.intro.stats.binned),
instructor = factor(instructor))textbook.fit <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification,
random = ~ 1|instructor, data= allYrsFinal, na.action=na.omit)
summary(textbook.fit)Linear mixed-effects model fit by REML
Data: allYrsFinal
AIC BIC logLik
31311.83 31407.41 -15642.92
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.2128222 0.9281217
Fixed effects: cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification
Value Std.Error DF t-value
(Intercept) 0.11953057 0.04364982 11302 2.738398
genderMale -0.09575831 0.01841568 11302 -5.199825
years.teaching.intro.stats.binned10-20 -0.02099960 0.04517081 11302 -0.464893
years.teaching.intro.stats.binned20-30 -0.02374339 0.06811487 11302 -0.348579
years.teaching.intro.stats.binned30+ 0.04461754 0.14446465 224 0.308847
years.teaching.intro.stats.binned5-10 0.03385650 0.04395613 11302 0.770234
textbook.classificationISI1st -0.15001502 0.05493475 11302 -2.730786
textbook.classificationNotSBI -0.09933388 0.04956882 11302 -2.003959
textbook.classificationNotSBI2 -0.20584842 0.06579719 11302 -3.128529
textbook.classificationOther -0.06546614 0.10042603 11302 -0.651884
textbook.classificationOtherSBI -0.11191696 0.04724361 11302 -2.368933
p-value
(Intercept) 0.0062
genderMale 0.0000
years.teaching.intro.stats.binned10-20 0.6420
years.teaching.intro.stats.binned20-30 0.7274
years.teaching.intro.stats.binned30+ 0.7577
years.teaching.intro.stats.binned5-10 0.4412
textbook.classificationISI1st 0.0063
textbook.classificationNotSBI 0.0451
textbook.classificationNotSBI2 0.0018
textbook.classificationOther 0.5145
textbook.classificationOtherSBI 0.0179
Correlation:
(Intr) gndrMl y....1 y....2 y....3
genderMale -0.153
years.teaching.intro.stats.binned10-20 -0.412 0.002
years.teaching.intro.stats.binned20-30 -0.315 -0.002 0.333
years.teaching.intro.stats.binned30+ -0.064 0.006 0.099 0.066
years.teaching.intro.stats.binned5-10 -0.199 0.005 0.295 0.183 0.089
textbook.classificationISI1st -0.578 -0.012 0.093 0.107 -0.003
textbook.classificationNotSBI -0.660 -0.017 0.014 0.054 -0.088
textbook.classificationNotSBI2 -0.434 0.000 -0.061 -0.005 -0.010
textbook.classificationOther -0.350 0.009 0.005 0.076 0.006
textbook.classificationOtherSBI -0.734 0.002 0.168 0.142 -0.041
y....5 t.ISI1 tx.NSBI t.NSBI2 txtb.O
genderMale
years.teaching.intro.stats.binned10-20
years.teaching.intro.stats.binned20-30
years.teaching.intro.stats.binned30+
years.teaching.intro.stats.binned5-10
textbook.classificationISI1st -0.082
textbook.classificationNotSBI -0.107 0.493
textbook.classificationNotSBI2 -0.164 0.369 0.424
textbook.classificationOther 0.073 0.236 0.281 0.191
textbook.classificationOtherSBI -0.052 0.587 0.638 0.415 0.320
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-5.33581741 -0.58706808 0.03741069 0.62501286 5.81740605
Number of Observations: 11537
Number of Groups: 226
Interaction plot Lines look pretty parallel
textbook.fit_fct <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification,
random = ~ 1|instructor, data= allYrsFinal_factor, na.action=na.omit)
textbook_plot <- ggpredict(textbook.fit_fct,
terms = c("gender", "textbook.classification", "instructor [sample=8]"),
type = "re")
ggplot(textbook_plot, aes(x=x, y=predicted, group = group, color = group))+
geom_line() +
labs(y= "Cognitive Competence Change", x="Female = 0, Male = 1", color = "Textbook")Proportion of males for each textbook
allYrsFinal|>
group_by(textbook.classification)|>
summarize(
total = n(),
num_males = sum(gender == "Male", na.rm = TRUE)
) |>
mutate(prop_males = num_males/total)# A tibble: 7 × 4
textbook.classification total num_males prop_males
<chr> <int> <int> <dbl>
1 ISI 2381 808 0.339
2 ISI1st 860 344 0.4
3 NotSBI 3966 1465 0.369
4 NotSBI2 796 273 0.343
5 Other 266 91 0.342
6 OtherSBI 3450 1206 0.350
7 <NA> 11 2 0.182
Gender, 5-10 years, 10-20 years, NotSBI, OtherSBI genderMale:textbook.classificationNotSBI, genderMale:textbook.classificationOther significant
textbook.fit_randGender <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification,
random = ~ 1+gender|instructor, data= allYrsFinal, na.action=na.omit)
summary(textbook.fit_randGender)Linear mixed-effects model fit by REML
Data: allYrsFinal
AIC BIC logLik
31308.45 31418.74 -15639.23
Random effects:
Formula: ~1 + gender | instructor
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.23472938 (Intr)
genderMale 0.09207456 -0.693
Residual 0.92720614
Fixed effects: cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification
Value Std.Error DF t-value
(Intercept) 0.10810141 0.04387651 11302 2.463765
genderMale -0.09559130 0.02096547 11302 -4.559463
years.teaching.intro.stats.binned10-20 -0.02626291 0.04453107 11302 -0.589766
years.teaching.intro.stats.binned20-30 -0.02011840 0.06639049 11302 -0.303031
years.teaching.intro.stats.binned30+ 0.05507746 0.13890797 224 0.396503
years.teaching.intro.stats.binned5-10 0.03594135 0.04355139 11302 0.825263
textbook.classificationISI1st -0.13506991 0.05449194 11302 -2.478713
textbook.classificationNotSBI -0.08253138 0.04879886 11302 -1.691256
textbook.classificationNotSBI2 -0.19268328 0.06520312 11302 -2.955124
textbook.classificationOther -0.06250709 0.09996904 11302 -0.625264
textbook.classificationOtherSBI -0.10260441 0.04674183 11302 -2.195130
p-value
(Intercept) 0.0138
genderMale 0.0000
years.teaching.intro.stats.binned10-20 0.5554
years.teaching.intro.stats.binned20-30 0.7619
years.teaching.intro.stats.binned30+ 0.6921
years.teaching.intro.stats.binned5-10 0.4092
textbook.classificationISI1st 0.0132
textbook.classificationNotSBI 0.0908
textbook.classificationNotSBI2 0.0031
textbook.classificationOther 0.5318
textbook.classificationOtherSBI 0.0282
Correlation:
(Intr) gndrMl y....1 y....2 y....3
genderMale -0.244
years.teaching.intro.stats.binned10-20 -0.401 -0.013
years.teaching.intro.stats.binned20-30 -0.299 -0.041 0.333
years.teaching.intro.stats.binned30+ -0.056 -0.027 0.102 0.070
years.teaching.intro.stats.binned5-10 -0.198 0.000 0.295 0.186 0.091
textbook.classificationISI1st -0.567 0.001 0.092 0.105 -0.003
textbook.classificationNotSBI -0.646 -0.009 0.008 0.044 -0.088
textbook.classificationNotSBI2 -0.430 0.017 -0.059 -0.006 -0.010
textbook.classificationOther -0.345 0.015 0.008 0.076 0.006
textbook.classificationOtherSBI -0.720 -0.001 0.169 0.139 -0.044
y....5 t.ISI1 tx.NSBI t.NSBI2 txtb.O
genderMale
years.teaching.intro.stats.binned10-20
years.teaching.intro.stats.binned20-30
years.teaching.intro.stats.binned30+
years.teaching.intro.stats.binned5-10
textbook.classificationISI1st -0.082
textbook.classificationNotSBI -0.106 0.490
textbook.classificationNotSBI2 -0.155 0.367 0.422
textbook.classificationOther 0.074 0.234 0.279 0.189
textbook.classificationOtherSBI -0.050 0.581 0.636 0.412 0.317
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-5.40748941 -0.58117379 0.03151791 0.62807530 5.84705080
Number of Observations: 11537
Number of Groups: 226
textbook.fit_randGender_fct <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification,
random = ~ 1+gender|instructor, data= allYrsFinal_factor, na.action=na.omit)
textbook_plot2 <- ggpredict(textbook.fit_randGender_fct,
terms = c("gender", "textbook.classification", "instructor [sample=8]"),
type = "re")
ggplot(textbook_plot2, aes(x=x, y=predicted, group = group, color = group))+
geom_line() +
labs(y= "Cognitive Competence Change", x="Female = 0, Male = 1", color = "Textbook")Regular regression: gender*textbook.classification
reg_text.fit <- lm(cognitive.competence.change~gender*textbook.classification + years.teaching.intro.stats.binned + instructor.gender, data = allYrsFinal)
summary(reg_text.fit)
Call:
lm(formula = cognitive.competence.change ~ gender * textbook.classification +
years.teaching.intro.stats.binned + instructor.gender, data = allYrsFinal)
Residuals:
Min 1Q Median 3Q Max
-5.4433 -0.5608 0.0501 0.5848 5.3480
Coefficients:
Estimate Std. Error t value
(Intercept) 0.28293 0.03003 9.423
genderMale -0.21058 0.04161 -5.061
textbook.classificationISI1st -0.27745 0.04926 -5.632
textbook.classificationNotSBI -0.29761 0.03209 -9.275
textbook.classificationNotSBI2 -0.38358 0.04858 -7.896
textbook.classificationOther -0.09346 0.07693 -1.215
textbook.classificationOtherSBI -0.20103 0.03265 -6.158
years.teaching.intro.stats.binned10-20 -0.03542 0.02457 -1.441
years.teaching.intro.stats.binned20-30 -0.06443 0.02772 -2.324
years.teaching.intro.stats.binned30+ 0.17214 0.05057 3.404
years.teaching.intro.stats.binned5-10 0.02141 0.02665 0.803
instructor.genderMale -0.06794 0.01930 -3.521
genderMale:textbook.classificationISI1st 0.23176 0.07801 2.971
genderMale:textbook.classificationNotSBI 0.18186 0.05202 3.496
genderMale:textbook.classificationNotSBI2 0.13923 0.08210 1.696
genderMale:textbook.classificationOther 0.02400 0.13056 0.184
genderMale:textbook.classificationOtherSBI 0.07203 0.05368 1.342
Pr(>|t|)
(Intercept) < 0.0000000000000002 ***
genderMale 0.00000042408515175 ***
textbook.classificationISI1st 0.00000001825396782 ***
textbook.classificationNotSBI < 0.0000000000000002 ***
textbook.classificationNotSBI2 0.00000000000000315 ***
textbook.classificationOther 0.224441
textbook.classificationOtherSBI 0.00000000076324727 ***
years.teaching.intro.stats.binned10-20 0.149506
years.teaching.intro.stats.binned20-30 0.020128 *
years.teaching.intro.stats.binned30+ 0.000667 ***
years.teaching.intro.stats.binned5-10 0.421730
instructor.genderMale 0.000432 ***
genderMale:textbook.classificationISI1st 0.002977 **
genderMale:textbook.classificationNotSBI 0.000475 ***
genderMale:textbook.classificationNotSBI2 0.089946 .
genderMale:textbook.classificationOther 0.854134
genderMale:textbook.classificationOtherSBI 0.179669
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9458 on 11520 degrees of freedom
(193 observations deleted due to missingness)
Multiple R-squared: 0.01564, Adjusted R-squared: 0.01427
F-statistic: 11.44 on 16 and 11520 DF, p-value: < 0.00000000000000022
num_classes <- length(unique(allYrsFinal$textbook.classification))
textbook_colors <- brewer.pal(min(num_classes, 6), "Dark2")
interaction.plot(x.factor=allYrsFinal$gender, trace.factor = allYrsFinal$textbook.classification, response = allYrsFinal$cognitive.competence.change,
xlab = "Gender",
ylab = "Cognitive Competence Change",
trace.label = "Textbook Classification",
col = textbook_colors)Random effects
Fixed effects
calc.fit6 <- lme(ach.gain.24 ~ gender + years.teaching.intro.stats.binned + cognitive.competence.change,
random = ~ 1|instructor, data= calc_req_NA)
summary(calc.fit6)Linear mixed-effects model fit by REML
Data: calc_req_NA
AIC BIC logLik
202.4965 234.7803 -93.24824
Random effects:
Formula: ~1 | instructor
(Intercept) Residual
StdDev: 0.1802149 0.2890601
Fixed effects: ach.gain.24 ~ gender + years.teaching.intro.stats.binned + cognitive.competence.change
Value Std.Error DF t-value
(Intercept) 0.20507282 0.08745810 412 2.344812
genderMale 0.06597470 0.03061753 412 2.154801
genderNA/Other -0.27490222 0.28997011 412 -0.948036
years.teaching.intro.stats.binned10-20 0.04498717 0.14182863 6 0.317194
years.teaching.intro.stats.binned5-10 -0.12786515 0.03656786 412 -3.496654
cognitive.competence.change 0.03576860 0.01713373 412 2.087614
p-value
(Intercept) 0.0195
genderMale 0.0318
genderNA/Other 0.3437
years.teaching.intro.stats.binned10-20 0.7618
years.teaching.intro.stats.binned5-10 0.0005
cognitive.competence.change 0.0374
Correlation:
(Intr) gndrMl gnNA/O y....1 y....5
genderMale -0.170
genderNA/Other -0.003 0.036
years.teaching.intro.stats.binned10-20 -0.599 0.000 -0.002
years.teaching.intro.stats.binned5-10 -0.152 0.026 -0.038 0.091
cognitive.competence.change -0.011 0.093 0.000 0.022 -0.007
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-5.05758716 -0.54758474 0.04772822 0.64230049 2.74008911
Number of Observations: 424
Number of Groups: 8
Interaction between gender and pre?
calc.fit7 <- lme(cognitive.competence.change ~ gender + ach.gain.24 + textbook.classification,
random = ~ 1+gender|instructor, data= calc_req, na.action=na.omit)
summary(calc.fit7)Linear mixed-effects model fit by REML
Data: calc_req
AIC BIC logLik
1074.348 1118.685 -526.1738
Random effects:
Formula: ~1 + gender | instructor
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.3766085 (Intr)
genderMale 0.1875762 -0.337
Residual 0.8210504
Fixed effects: cognitive.competence.change ~ gender + ach.gain.24 + textbook.classification
Value Std.Error DF t-value p-value
(Intercept) -0.1079576 0.2192687 412 -0.4923532 0.6227
genderMale -0.1134483 0.1385524 412 -0.8188110 0.4134
ach.gain.24 0.2855871 0.1393892 412 2.0488463 0.0411
textbook.classificationNotSBI -0.2710299 0.3512820 4 -0.7715451 0.4834
textbook.classificationNotSBI2 0.6282174 0.4833295 4 1.2997705 0.2635
textbook.classificationOther -0.0531318 0.4687671 4 -0.1133437 0.9152
textbook.classificationOtherSBI 0.0279503 0.1061041 412 0.2634231 0.7924
Correlation:
(Intr) gndrMl ac..24 tx.NSBI t.NSBI2 txtb.O
genderMale -0.369
ach.gain.24 -0.223 -0.050
textbook.classificationNotSBI -0.563 0.095 0.098
textbook.classificationNotSBI2 -0.383 -0.005 0.081 0.236
textbook.classificationOther -0.407 0.008 0.113 0.248 0.180
textbook.classificationOtherSBI -0.141 0.002 0.189 0.079 0.058 0.066
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.193750602 -0.602104601 -0.006958229 0.666769643 3.099360142
Number of Observations: 423
Number of Groups: 8
calc_req |>
ggplot(aes(x= cognitive.competence.pre, y = cognitive.competence.change, color=gender, shape = textbook.classification)) +
geom_point(alpha=0.5)calc_cols <- calc_req |>
select(textbook.classification, cognitive.competence.change, cognitive.competence.pre) |>
filter(!is.na(textbook.classification), !is.na(cognitive.competence.change), !is.na(cognitive.competence.pre))
# one hot encoding
calc_cols <- calc_cols |>
mutate(textbook.classification = factor(textbook.classification))|>
pivot_wider(names_from = textbook.classification, values_from = textbook.classification,
values_fn = length, values_fill = list(textbook.classification = 0))
set.seed(123)
km.out <- kmeans(calc_cols, centers = 4, nstart = 20)
km.outK-means clustering with 4 clusters of sizes 85, 56, 41, 53
Cluster means:
cognitive.competence.change cognitive.competence.pre NotSBI2 NotSBI
1 -1.02941176 5.825490 0.01176471 0.3176471
2 0.88690476 4.244048 0.07142857 0.1428571
3 -0.39430894 4.528455 0.00000000 0.1463415
4 -0.01886792 4.858491 0.09433962 0.1320755
OtherSBI ISI Other
1 0.4941176 0.4235294 0.04705882
2 0.7500000 0.2857143 0.07142857
3 2.0487805 0.2926829 0.02439024
4 0.5660377 1.6981132 0.07547170
Clustering vector:
[1] 4 2 2 1 4 4 4 4 2 1 3 1 1 1 4 1 1 2 1 1 3 3 2 2 2 2 1 3 4 1 1 4 1 1 1 3 1
[38] 3 1 3 3 3 2 1 2 3 2 3 2 2 3 1 2 1 2 2 1 3 4 3 2 1 1 2 3 1 1 1 4 2 4 1 1 4
[75] 1 2 1 3 1 3 2 3 4 4 4 3 3 1 2 4 3 1 4 2 3 3 1 1 1 3 2 1 4 1 1 2 3 1 3 1 4
[112] 3 1 3 3 3 3 4 1 4 1 1 1 3 1 2 1 1 2 2 4 1 1 1 2 3 4 2 2 2 2 4 2 2 3 3 3 2
[149] 3 4 1 1 1 3 2 4 1 4 2 2 3 2 3 2 2 1 1 3 2 1 2 4 4 1 4 4 4 1 1 1 1 4 1 1 2
[186] 1 1 1 1 2 4 4 1 2 1 4 4 2 4 4 2 4 4 4 1 2 4 4 4 4 4 2 1 2 1 1 4 1 2 1 4 1
[223] 1 1 4 1 1 1 1 4 4 2 2 2 4
Within cluster sum of squares by cluster:
[1] 165.78366 83.94048 72.24661 104.71593
(between_SS / total_SS = 47.1 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"
[6] "betweenss" "size" "iter" "ifault"
# Decide how many clusters to look at
n_clusters <- 10
# Initialize total within sum of squares error: wss
wss <- numeric(n_clusters)
set.seed(123)
# Look over 1 to n possible clusters
for (i in 1:n_clusters) {
# Fit the model: km.out
km.out <- kmeans(calc_cols, centers = i, nstart = 20)
# Save the within cluster sum of squares
wss[i] <- km.out$tot.withinss
}
# Produce a scree plot
wss_df <- tibble(clusters = 1:n_clusters, wss = wss)
scree_plot <- ggplot(wss_df, aes(x = clusters, y = wss, group = 1)) +
geom_point(size = 4)+
geom_line() +
scale_x_continuous(breaks = c(2, 4, 6, 8, 10)) +
xlab('Number of clusters')
scree_plotkm.out <- kmeans(calc_cols, centers = 5, nstart = 20)
calc_cols$cluster_id <- factor(km.out$cluster)
ggplot(calc_cols, aes(x= cognitive.competence.pre, y=cognitive.competence.change, color = cluster_id)) +
geom_point(alpha = 0.5) Compare with only binary SBI
unique(calc_req$textbook.classification)[1] "NotSBI2" "NotSBI" "OtherSBI" "ISI" "Other"
calc_req <- calc_req|>
mutate(textbookSBI = case_when(textbook.classification == "ISI"~ "SBI",
textbook.classification == "OtherSBI" ~ "SBI",
textbook.classification == "NotSBI" ~ "Not SBI",
textbook.classification == "NotSBI2" ~ "Not SBI",
textbook.classification == "Other" ~ "Other"))
calc_req |>
ggplot(aes(x= cognitive.competence.pre, y = cognitive.competence.change, color=gender, shape = textbookSBI)) +
geom_point(alpha=0.5)calc_req |>
ggplot(aes(x=gender, y = cognitive.competence.change)) +
geom_boxplot() +
geom_violin(alpha = 0.5, color = "olivedrab")calc_req |>
#drop_na(cognitive.competence.change) |>
ggplot(aes(x=instructor.gender , y = cognitive.competence.change)) +
geom_boxplot() +
geom_violin(alpha = 0.5, color = "olivedrab")calc_req |>
ggplot(aes(x= years.teaching.experience, y = cognitive.competence.change)) +
geom_point() # not linearcalc_req |>
ggplot(aes(x= years.teaching.experience.binned, y = cognitive.competence.change)) +
geom_boxplot() +
geom_violin(alpha = 0.5, color = "olivedrab")calc_req|>
group_by(institution) |>
mutate(mean_change = mean(cognitive.competence.change)) |>
ungroup()|>
ggplot(aes(x= institution, y = mean_change)) +
geom_boxplot()calc_req|>
ggplot(aes(x=gender, y=cognitive.competence.change)) +
geom_boxplot() + geom_violin(alpha = 0.5, color = "orchid")count(calc_req) # 513# A tibble: 1 × 1
n
<int>
1 423
calc_req |>
filter(is.na(gender))|> # 2 gender NAs
count()# A tibble: 1 × 1
n
<int>
1 0
non_calc <- allYrsFinal|>
filter(math.prereq != "Calculus")
non_calc|>
ggplot(aes(x=gender, y=cognitive.competence.change)) +
geom_boxplot() + geom_violin(alpha = 0.5, color = "orchid")count(non_calc) # 13704 # A tibble: 1 × 1
n
<int>
1 11115
non_calc|>
filter(is.na(gender))|> # 115 gender NAs
count()# A tibble: 1 × 1
n
<int>
1 62
# allYrsFinal|>
# select(institution)
#unique(allYrsFinal$institution)
allYrsFinal |>
filter(carnegie.classification == "Baccalaureate College") |>
group_by(factor(institution))|>
summarize(mean_cog_comp_change = mean(cognitive.competence.change))|>
arrange(mean_cog_comp_change)# A tibble: 27 × 2
`factor(institution)` mean_cog_comp_change
<fct> <dbl>
1 111 -1.22
2 124 -0.396
3 8 -0.281
4 119 -0.269
5 98 -0.236
6 122 -0.183
7 108 -0.176
8 15 -0.124
9 71 -0.0816
10 99 -0.0794
# ℹ 17 more rows
# worst: 40 : -3.2
# best: 28 : 0.875allYrsFinal|>
filter(institution == 40)|>
group_by(factor(instructor))|>
summarize(mean_cog_comp_change = mean(cognitive.competence.change))|>
arrange(mean_cog_comp_change)# A tibble: 1 × 2
`factor(instructor)` mean_cog_comp_change
<fct> <dbl>
1 Instructor195 0.115
allYrsFinal|>
filter(instructor == "Instructor50")|>
group_by(factor(section))|>
summarize(mean_cog_comp_change = mean(cognitive.competence.change))|>
arrange(mean_cog_comp_change)# A tibble: 0 × 2
# ℹ 2 variables: factor(section) <fct>, mean_cog_comp_change <dbl>
instr_50 <- allYrsFinal|>
filter(instructor == "Instructor50")
instr_50["textbook.classification"] # NotSBI# A tibble: 0 × 1
# ℹ 1 variable: textbook.classification <chr>
instr_50 |>
ggplot(aes(x=gender, y=cognitive.competence.change)) +
geom_boxplot() + geom_violin(alpha = 0.5, color = "orchid")instr_50 |>
dplyr::select(ach.gain.24) # all NAs ?# A tibble: 0 × 1
# ℹ 1 variable: ach.gain.24 <dbl>
instr_50 |>
group_by(gender)|> # 38:17 gender split
count()# A tibble: 0 × 2
# Groups: gender [0]
# ℹ 2 variables: gender <chr>, n <int>
instr_50["instructor.gender"] # female instructor# A tibble: 0 × 1
# ℹ 1 variable: instructor.gender <chr>
allYrsFinal |>
filter(institution == 28)|>
group_by(factor(instructor))|>
summarize(mean_cog_comp_change = mean(cognitive.competence.change))|>
arrange(mean_cog_comp_change)# A tibble: 1 × 2
`factor(instructor)` mean_cog_comp_change
<fct> <dbl>
1 Instructor33 0.644
allYrsFinal |>
filter(instructor == "Instructor33")|>
group_by(factor(section))|>
summarize(mean_cog_comp_change = mean(cognitive.competence.change))|> # sections pretty similar
arrange(mean_cog_comp_change)# A tibble: 2 × 2
`factor(section)` mean_cog_comp_change
<fct> <dbl>
1 1 0.545
2 2 0.747
instr_33 <- allYrsFinal |>
filter(instructor == "Instructor33")
instr_33["textbook.classification"] # NotSBI# A tibble: 51 × 1
textbook.classification
<chr>
1 NotSBI
2 NotSBI
3 NotSBI
4 NotSBI
5 NotSBI
6 NotSBI
7 NotSBI
8 NotSBI
9 NotSBI
10 NotSBI
# ℹ 41 more rows
instr_33 |>
ggplot(aes(x=gender, y=cognitive.competence.change)) +
geom_boxplot() + geom_violin(alpha = 0.5, color = "orchid")instr_33 |>
drop_na(ach.gain.24)|>
summarize(mean(ach.gain.24)) # 0.1689703 # A tibble: 1 × 1
`mean(ach.gain.24)`
<dbl>
1 0.169
instr_33 |>
group_by(gender)|> # 39:21 gender split
count()# A tibble: 2 × 2
# Groups: gender [2]
gender n
<chr> <int>
1 Female 35
2 Male 16
instr_33["instructor.gender"] # female instructor# A tibble: 51 × 1
instructor.gender
<chr>
1 Female
2 Female
3 Female
4 Female
5 Female
6 Female
7 Female
8 Female
9 Female
10 Female
# ℹ 41 more rows
top10 <- allYrsFinal|>
filter(year == "16-17") |>
group_by(instructor.section)|>
mutate(mean_cog_comp_change = mean(cognitive.competence.change))|>
#arrange(mean_cog_comp_change)|>
slice_max(order_by = mean_cog_comp_change, n =10)|>
ungroup()
top10# A tibble: 4,411 × 298
instructor section instructor.section opt.out.pre opt.out.post test.time.pre
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Instructor… 1 Instructor100_Sec… 1 1 35
2 Instructor… 1 Instructor100_Sec… 1 1 1469
3 Instructor… 1 Instructor100_Sec… 1 1 29
4 Instructor… 1 Instructor100_Sec… 1 1 51
5 Instructor… 1 Instructor100_Sec… 1 1 27
6 Instructor… 1 Instructor100_Sec… 1 1 37
7 Instructor… 1 Instructor100_Sec… 1 1 35
8 Instructor… 1 Instructor100_Sec… 1 1 34
9 Instructor… 1 Instructor100_Sec… 1 1 25
10 Instructor… 1 Instructor100_Sec… 1 1 39
# ℹ 4,401 more rows
# ℹ 292 more variables: test.time.post <dbl>, q16a.pre.c.1415 <chr>,
# q16b.pre.c.1415 <chr>, q16c.pre.c.1415 <chr>, q16a.pre.c <chr>,
# q16b.pre.c <chr>, q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>,
# q17.pre.c <chr>, q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>,
# q21.pre.c <chr>, q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>,
# q25.pre.c <chr>, q26.pre.c <chr>, q27.pre.c <chr>, q28.pre.c <chr>, …
Long format
competenceFull <- c("q6e.pre.a", "q6e.post.a", "q7a.pre.a", "q7a.post.a", "q8f.pre.a", "q8f.post.a", "q9a.pre.a", "q9a.post.a", "q9b.pre.a", "q9b.post.a", "q9e.pre.a", "q9e.post.a")
questions_long_top10 <- top10 |>
select(all_of(competenceFull), gender, ach.gain.24) |>
pivot_longer(cols=all_of(competenceFull),
names_to = "question",
values_to = "score")
questions_long_top10 <- questions_long_top10 |>
mutate(row_id = row_number(),
preOrPost = case_when(str_detect(question, "pre") ~ "pre",
str_detect(question, "post") ~ "post"),
question_num = str_extract(question, "q\\d+[a-z]"))
questions_wide_top10 <- questions_long_top10 |>
pivot_wider(names_from = preOrPost, values_from = score, id_cols = c(row_id, gender, question_num)) |>
select(-row_id) |>
group_by(gender, question_num) |>
fill(pre, post, .direction = "downup") |>
ungroup()
questions_change_top10 <- questions_wide_top10 |>
mutate(change = post - pre) Change Per Question by Gender Boxplots (top 10)
questions_change_top10|>
ggplot(aes(x=question_num, y=change, color = gender)) +
geom_boxplot() +
labs(title="Top 10 sections: Change Per Question by Gender Boxplots")Change Per Question by Gender Line Graph
questions_change_top10|>
mutate(question_num = factor(question_num, levels = unique(question_num))) |>
group_by(gender, question_num) |>
summarize(mean_change = mean(change)) |>
ungroup() |>
ggplot(aes(x=question_num, y=mean_change, color = gender, group = gender)) +
geom_line() low10 <- allYrsFinal|>
filter(year == "16-17") |>
group_by(instructor.section)|>
mutate(mean_cog_comp_change = mean(cognitive.competence.change))|>
#arrange(mean_cog_comp_change)|>
slice_min(order_by = mean_cog_comp_change, n =10)|>
ungroup()
low10# A tibble: 4,411 × 298
instructor section instructor.section opt.out.pre opt.out.post test.time.pre
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Instructor… 1 Instructor100_Sec… 1 1 35
2 Instructor… 1 Instructor100_Sec… 1 1 1469
3 Instructor… 1 Instructor100_Sec… 1 1 29
4 Instructor… 1 Instructor100_Sec… 1 1 51
5 Instructor… 1 Instructor100_Sec… 1 1 27
6 Instructor… 1 Instructor100_Sec… 1 1 37
7 Instructor… 1 Instructor100_Sec… 1 1 35
8 Instructor… 1 Instructor100_Sec… 1 1 34
9 Instructor… 1 Instructor100_Sec… 1 1 25
10 Instructor… 1 Instructor100_Sec… 1 1 39
# ℹ 4,401 more rows
# ℹ 292 more variables: test.time.post <dbl>, q16a.pre.c.1415 <chr>,
# q16b.pre.c.1415 <chr>, q16c.pre.c.1415 <chr>, q16a.pre.c <chr>,
# q16b.pre.c <chr>, q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>,
# q17.pre.c <chr>, q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>,
# q21.pre.c <chr>, q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>,
# q25.pre.c <chr>, q26.pre.c <chr>, q27.pre.c <chr>, q28.pre.c <chr>, …
Long format
questions_long_low10 <- low10 |>
select(all_of(competenceFull), gender, ach.gain.24) |>
pivot_longer(cols=all_of(competenceFull),
names_to = "question",
values_to = "score")
questions_long_low10 <- questions_long_low10 |>
mutate(row_id = row_number(),
preOrPost = case_when(str_detect(question, "pre") ~ "pre",
str_detect(question, "post") ~ "post"),
question_num = str_extract(question, "q\\d+[a-z]"))
questions_wide_low10 <- questions_long_low10 |>
pivot_wider(names_from = preOrPost, values_from = score, id_cols = c(row_id, gender, question_num)) |>
select(-row_id) |>
group_by(gender, question_num) |>
fill(pre, post, .direction = "downup") |>
ungroup()
questions_change_low10 <- questions_wide_low10 |>
mutate(change = post - pre) Change Per Question by Gender Boxplots (lowest 10)
questions_change_top10|>
ggplot(aes(x=question_num, y=change, color = gender)) +
geom_boxplot() +
labs(title="Lowest 10 sections: Change Per Question by Gender Boxplots")Change Per Question by Gender Line Graph (lowest 10)
questions_change_top10|>
mutate(question_num = factor(question_num, levels = unique(question_num))) |>
group_by(gender, question_num) |>
summarize(mean_change = mean(change)) |>
ungroup() |>
ggplot(aes(x=question_num, y=mean_change, color = gender, group = gender)) +
geom_line() allYrsFinal|>
filter(textbook.classification == "ISCAM")# A tibble: 0 × 297
# ℹ 297 variables: instructor <chr>, section <chr>, instructor.section <chr>,
# opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
# test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
# q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
# q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
# q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
# q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …